
**********************************************************
*** COVID-19 / RKI ***************************************
**********************************************************

/* 
Covid-19 data set has been downloaded from:
https://www.arcgis.com/home/item.html?id=f10774f1c63e40168479a1feb6c7ca74
https://npgeo-corona-npgeo-de.hub.arcgis.com/datasets/dd4580c810204019a7b8eb3e0b329dd6_0/explore?showTable=true
*/


*** Import downloaded data set:
import delimited "${orig_data}RKI_COVID19.csv", clear ///
delim(",") varnames(1) encoding("utf-8")

***	code negative values as missings
global vars anzahlfall anzahltodesfall anzahlgenesen neuerfall neuertodesfall neugenesen

foreach var of global vars {
replace `var' = . if `var' < 0
}


********************
*** Data Cleaning **
********************

***	Bring the date in a format that can be used later on
gen meldedatum2 =date(meldedatum,"YMD###")
format meldedatum2 %td
label variable meldedatum2 "Datum der Meldung"

gen refdatum2=date(refdatum,"YMD###")
format refdatum2 %td
label variable refdatum2 "Referenzdatum"

***	rename some variables
rename anzahlfall cases
rename anzahltodesfall casualties
rename anzahlgenesen recovered
rename neuerfall cases_new
rename neuertodesfall casualties_new
rename neugenesen recovered_new

*** sort data set
sort date idlandkreis

*** order data set
order idbundesland bundesland idlandkreis landkreis ///
meldedatum refdatum date altersgruppe altersgruppe2 geschlecht ///
cases casualties recovered cases_new casualties_new recovered_new ///
isterkrankungsbeginn


*** Save County ids 

preserve
keep idbundesland bundesland idlandkreis landkreis
sort idlandkreis
drop if idlandkreis == idlandkreis[_n - 1]
save "${altered_data}landkreis_ids.dta", replace
restore


***	save the dataset
save "${altered_data}RKI_COVID19", replace 


*** County data set by meldedatum

collapse (sum) cases casualties recovered cases_new casualties_new recovered_new ///
(last) idbundesland bundesland landkreis, by(meldedatum2 idlandkreis altersgruppe geschlecht)

save "${altered_data}kreis_age_gender_daily_meldedatum.dta", replace
